knitr::opts_chunk$set(echo = TRUE)
library(knitr)
library(ggplot2)
library(reshape2)
library(caret)
## Loading required package: lattice
library(doParallel)
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
library(caTools)
#library(pls) # pls
library(MASS) #lda
library(naivebayes) # naivebayes
## naivebayes 0.9.6 loaded
library(ipred); library(plyr); library(e1071) # bagged tree
library(randomForest) #rf
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(gbm) # gbm
## Loaded gbm 2.1.5
#library(xgboost) #xgboost

#Data Input, preparation and descriptives ##Import the dataframe of patient parameter and NGC expression

ctmmMir <- read.delim(file = "CTMM_miR_all.txt", stringsAsFactors = FALSE)
str(ctmmMir[,1:6])
## 'data.frame':    396 obs. of  6 variables:
##  $ Sorting.index   : int  313 4 5 6 316 7 29 30 336 337 ...
##  $ Numeric_Pat_code: int  9 10 12 14 15 16 57 58 59 60 ...
##  $ Hospital        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ CC_number       : int  10045 10046 10048 10050 10051 10052 10107 10108 10109 10110 ...
##  $ Sex             : int  0 1 1 1 0 1 1 0 1 0 ...
##  $ Race            : int  1 1 1 1 1 1 1 1 1 1 ...
kable(head(ctmmMir[,1:6]))
Sorting.index Numeric_Pat_code Hospital CC_number Sex Race
313 9 1 10045 0 1
4 10 1 10046 1 1
5 12 1 10048 1 1
6 14 1 10050 1 1
316 15 1 10051 0 1
7 16 1 10052 1 1
names(ctmmMir)
##   [1] "Sorting.index"                    "Numeric_Pat_code"                
##   [3] "Hospital"                         "CC_number"                       
##   [5] "Sex"                              "Race"                            
##   [7] "age"                              "Length_cm"                       
##   [9] "Weight"                           "bmi"                             
##  [11] "BMI_quartile"                     "Heart_rate"                      
##  [13] "Blood_Pressure_systolic"          "Systolic_Quartile"               
##  [15] "Diastolic_Quartile"               "Blood_Pressure_diastolic"        
##  [17] "Pres_diag"                        "Conf_Diagn"                      
##  [19] "NYHA"                             "Prev_HF"                         
##  [21] "Prev_MI"                          "Prev_PTCA"                       
##  [23] "Prev_CABG"                        "Pulmonary_disease"               
##  [25] "CVA_TIA"                          "Renail_failure"                  
##  [27] "Peripheral_vessel_disease"        "Hypertension"                    
##  [29] "Known_dislipidemia"               "Diabetes_Mellitus"               
##  [31] "Current_Smoker"                   "Pack_years"                      
##  [33] "Familial_MI"                      "FH_below_60"                     
##  [35] "Heart_Failure"                    "SYNTAX"                          
##  [37] "SYNTAX_quartiles"                 "SYNTAXBIN_classic"               
##  [39] "Added_SYNTAX"                     "TOTALSYNTAX_quartiles"           
##  [41] "TOTAL_SYNTAX"                     "CRP_Quartiles"                   
##  [43] "Lab_CRP"                          "Glucose_Quartiles"               
##  [45] "Lab_glucose"                      "Lab_WBC"                         
##  [47] "Hemoglobin_Quartiles"             "Lab_heamogl"                     
##  [49] "Lab_segm_neutro"                  "Lab__lymp"                       
##  [51] "Lab_mono"                         "Lab_tot_chol"                    
##  [53] "Lab_HDL_chol"                     "Lab_trigly"                      
##  [55] "LDL_Quartiles"                    "Lab_LDL_chol"                    
##  [57] "Angiog_descr_domin"               "Angiog_num_diss_vesse"           
##  [59] "Total_occlus"                     "Collaterals"                     
##  [61] "Prev_bypass"                      "Prev_stent"                      
##  [63] "Prev_PTCA_angio"                  "Resid_sten"                      
##  [65] "Death"                            "Angina"                          
##  [67] "Myocard_inf"                      "Sec_perc_interv"                 
##  [69] "Coro_bypass"                      "Cerbr_acci"                      
##  [71] "Arrythmia_teat"                   "FW_Death"                        
##  [73] "Angina_pectoris_fu"               "Myocard_infarc_fu"               
##  [75] "Sec_perc_interv_fu"               "Coronary_bypass_fu"              
##  [77] "Cerebro_vasc_accid"               "Arrythmia"                       
##  [79] "Blood_pressure_systolic_FU"       "Blood_pressure_diastolic_FU"     
##  [81] "Death_date"                       "Cardiac_cause"                   
##  [83] "event_score"                      "MACE_dichotomous"                
##  [85] "pe_betablock"                     "pe_calcium"                      
##  [87] "pe_nitrate"                       "pe_ASAacetylz"                   
##  [89] "pe_thromb"                        "pe_ADPclopidog"                  
##  [91] "pe_ezeoverchol"                   "pe_ACE"                          
##  [93] "pe_ATrecep"                       "pe_diurectica"                   
##  [95] "pe_statines"                      "pe_antiarrhy"                    
##  [97] "pe_other"                         "ap_nitrate"                      
##  [99] "ap_ASAacetyl"                     "ap_clopido"                      
## [101] "dis_betablock"                    "dis_ASAacetyl"                   
## [103] "dis_ADPclopido"                   "dis_ACEinhi"                     
## [105] "dis_statines"                     "dis_othermed"                    
## [107] "Abs_cell_count_classical"         "Classic_Quartile"                
## [109] "Classic_Hi_Lo"                    "Abs_cell_count_Intmed"           
## [111] "Intermediate_Quartile"            "IntMed_Hi_Lo"                    
## [113] "Abs_cell_count_nonclass"          "Non_Classic_Quartile"            
## [115] "Non_Classic_Hi_Lo"                "Class_perc_of_tot_monocyte_count"
## [117] "Class_Perc_Quartile"              "IntMed_perc"                     
## [119] "Int_Perc_Quartile"                "Non_Class_Perc"                  
## [121] "Non_Class_Perc_Quartile"          "hsa_let_7f"                      
## [123] "hsa_let_7g"                       "hsa_let_7i"                      
## [125] "hsa_miR_103"                      "hsa_miR_106b"                    
## [127] "hsa_miR_124"                      "hsa_miR_126"                     
## [129] "hsa_miR_130b"                     "hsa_miR_138"                     
## [131] "hsa_miR_139_5p"                   "hsa_miR_140_5p"                  
## [133] "hsa_miR_142_3p"                   "hsa_miR_142_5p"                  
## [135] "hsa_miR_145"                      "hsa_miR_146a"                    
## [137] "hsa_miR_150"                      "hsa_miR_155"                     
## [139] "hsa_miR_15b"                      "hsa_miR_181a"                    
## [141] "hsa_miR_193a_5p"                  "hsa_miR_197"                     
## [143] "hsa_miR_19a"                      "hsa_miR_19b"                     
## [145] "hsa_miR_20a"                      "hsa_miR_20b"                     
## [147] "hsa_miR_21"                       "hsa_miR_212"                     
## [149] "hsa_miR_221"                      "hsa_miR_223"                     
## [151] "hsa_miR_26b"                      "hsa_miR_27a"                     
## [153] "hsa_miR_27b"                      "hsa_miR_28_5p"                   
## [155] "hsa_miR_29a"                      "hsa_miR_342_3p"                  
## [157] "hsa_miR_365"                      "hsa_miR_376c"                    
## [159] "hsa_miR_424"                      "hsa_miR_450b_5p"                 
## [161] "hsa_miR_451"                      "hsa_miR_486_3p"                  
## [163] "hsa_miR_574_3p"                   "hsa_miR_590_5p"                  
## [165] "hsa_miR_93"                       "hsa_miR_98"                      
## [167] "hsa_let_7b"                       "hsa_let_7d"                      
## [169] "hsa_let_7e"                       "hsa_miR_1"                       
## [171] "hsa_miR_100"                      "hsa_miR_10a"                     
## [173] "hsa_miR_125a_5p"                  "hsa_miR_125b"                    
## [175] "hsa_miR_128"                      "hsa_miR_130a"                    
## [177] "hsa_miR_132"                      "hsa_miR_133a"                    
## [179] "hsa_miR_133b"                     "hsa_miR_146b_5p"                 
## [181] "hsa_miR_17"                       "hsa_miR_18b"                     
## [183] "hsa_miR_191"                      "hsa_miR_193b"                    
## [185] "hsa_miR_195"                      "hsa_miR_200a"                    
## [187] "hsa_miR_210"                      "hsa_miR_218"                     
## [189] "hsa_miR_26a"                      "hsa_miR_28_3p"                   
## [191] "hsa_miR_31"                       "hsa_miR_32"                      
## [193] "hsa_miR_328"                      "hsa_miR_331_3p"                  
## [195] "hsa_miR_34a"                      "hsa_miR_345"                     
## [197] "hsa_miR_374b"                     "hsa_miR_422a"                    
## [199] "hsa_miR_423_5p"                   "hsa_miR_449a"                    
## [201] "hsa_miR_449b"                     "hsa_miR_487b"                    
## [203] "hsa_miR_491_5p"                   "hsa_miR_500"                     
## [205] "hsa_miR_501_5p"                   "hsa_miR_503"                     
## [207] "hsa_miR_628_5p"                   "hsa_miR_671_3p"                  
## [209] "hsa_miR_708"                      "hsa_miR_885_5p"                  
## [211] "hsa_miR_92a"                      "hsa_miR_99a"                     
## [213] "hsa_miR_99b"

##Split into responce and NGC expression Split data to clinical parameters and NGC expression

res <- ctmmMir[,1:121]
row.names(res) <- ctmmMir$CC_number
ftrExprs <- ctmmMir[,122:213]
row.names(ftrExprs) <- ctmmMir$CC_number
head(names(res))
## [1] "Sorting.index"    "Numeric_Pat_code" "Hospital"        
## [4] "CC_number"        "Sex"              "Race"
head(names(ftrExprs))
## [1] "hsa_let_7f"   "hsa_let_7g"   "hsa_let_7i"   "hsa_miR_103" 
## [5] "hsa_miR_106b" "hsa_miR_124"

##Descriptives of the data

Missing Values (Only one missing for confirmed diagnosis)

numNA <- sapply(ctmmMir, function(x) sum(is.na(x)))
numNA[!(numNA == 0)]
##                   Length_cm                      Weight 
##                           8                           6 
##                         bmi                BMI_quartile 
##                           8                           8 
##                  Heart_rate     Blood_Pressure_systolic 
##                           5                           8 
##           Systolic_Quartile          Diastolic_Quartile 
##                           8                           8 
##    Blood_Pressure_diastolic                Hypertension 
##                           8                           1 
##          Known_dislipidemia           Diabetes_Mellitus 
##                           1                           2 
##              Current_Smoker                  Pack_years 
##                           1                          50 
##                 Familial_MI               Heart_Failure 
##                          10                           2 
##                      SYNTAX            SYNTAX_quartiles 
##                         179                         179 
##           SYNTAXBIN_classic                Added_SYNTAX 
##                         179                         331 
##       TOTALSYNTAX_quartiles                TOTAL_SYNTAX 
##                         179                         179 
##               CRP_Quartiles                     Lab_CRP 
##                         123                         123 
##           Glucose_Quartiles                 Lab_glucose 
##                           3                           3 
##        Hemoglobin_Quartiles                 Lab_heamogl 
##                          19                          19 
##             Lab_segm_neutro                   Lab__lymp 
##                          99                          99 
##                    Lab_mono                Lab_tot_chol 
##                         100                           3 
##                Lab_HDL_chol                  Lab_trigly 
##                           3                           6 
##               LDL_Quartiles                Lab_LDL_chol 
##                           3                           3 
##          Angiog_descr_domin       Angiog_num_diss_vesse 
##                          10                          10 
##                Total_occlus                 Collaterals 
##                          37                          38 
##                 Prev_bypass                  Prev_stent 
##                          36                          36 
##             Prev_PTCA_angio                  Resid_sten 
##                          38                          78 
##                       Death                      Angina 
##                          10                          11 
##                 Myocard_inf             Sec_perc_interv 
##                          11                          13 
##                 Coro_bypass                  Cerbr_acci 
##                          11                          11 
##              Arrythmia_teat                    FW_Death 
##                          11                          40 
##          Angina_pectoris_fu           Myocard_infarc_fu 
##                          42                          42 
##          Sec_perc_interv_fu          Coronary_bypass_fu 
##                          42                          42 
##          Cerebro_vasc_accid                   Arrythmia 
##                          42                          42 
##  Blood_pressure_systolic_FU Blood_pressure_diastolic_FU 
##                         258                         258 
##                 event_score            MACE_dichotomous 
##                          21                          11 
##                pe_betablock                  pe_calcium 
##                          14                          14 
##                  pe_nitrate               pe_ASAacetylz 
##                          14                          14 
##                   pe_thromb              pe_ADPclopidog 
##                          14                          14 
##              pe_ezeoverchol                      pe_ACE 
##                          14                          14 
##                  pe_ATrecep               pe_diurectica 
##                          14                          14 
##                 pe_statines                pe_antiarrhy 
##                          14                          14 
##                    pe_other                  ap_nitrate 
##                          14                          14 
##                ap_ASAacetyl                  ap_clopido 
##                          15                          15 
##               dis_betablock               dis_ASAacetyl 
##                          14                          14 
##              dis_ADPclopido                 dis_ACEinhi 
##                          14                          14 
##                dis_statines                dis_othermed 
##                          14                          14 
##                  hsa_let_7f                 hsa_miR_124 
##                           1                          15 
##                 hsa_miR_138              hsa_miR_139_5p 
##                         139                          13 
##              hsa_miR_142_3p             hsa_miR_193a_5p 
##                           1                           1 
##                 hsa_miR_212                 hsa_miR_223 
##                          10                           1 
##                 hsa_miR_27b               hsa_miR_28_5p 
##                           1                           1 
##                hsa_miR_376c                 hsa_miR_424 
##                          61                          20 
##             hsa_miR_450b_5p                 hsa_miR_451 
##                          18                         110 
##              hsa_miR_486_3p                  hsa_miR_98 
##                           2                           1 
##                   hsa_miR_1                 hsa_miR_100 
##                         214                           1 
##                 hsa_miR_10a             hsa_miR_125a_5p 
##                          30                          20 
##                hsa_miR_125b                hsa_miR_130a 
##                          16                           1 
##                hsa_miR_133a                hsa_miR_133b 
##                         113                         309 
##                 hsa_miR_18b                hsa_miR_193b 
##                         176                           9 
##                hsa_miR_200a                 hsa_miR_210 
##                          33                          12 
##                 hsa_miR_218                  hsa_miR_31 
##                         158                         117 
##                  hsa_miR_32                 hsa_miR_34a 
##                           3                          23 
##                hsa_miR_374b                hsa_miR_449a 
##                           7                         134 
##                hsa_miR_449b                hsa_miR_487b 
##                         116                         127 
##              hsa_miR_491_5p                 hsa_miR_500 
##                           1                           4 
##                 hsa_miR_503                 hsa_miR_708 
##                         148                          11 
##              hsa_miR_885_5p                 hsa_miR_99a 
##                           3                           1 
##                 hsa_miR_99b 
##                          37

Different types of variables (continuous, categrical or descriptive)

levelsVar <- sapply(X = res, FUN = function(x) { nlevels(factor(x))})


catVar <- levelsVar <10
numVar <- !catVar &  sapply(X = res, FUN = function(x) { is.numeric(x)})
charVar <- !(catVar | numVar)

Categrical variables are:

names(res)[catVar]
##  [1] "Hospital"                  "Sex"                      
##  [3] "Race"                      "BMI_quartile"             
##  [5] "Systolic_Quartile"         "Diastolic_Quartile"       
##  [7] "Pres_diag"                 "Conf_Diagn"               
##  [9] "NYHA"                      "Prev_HF"                  
## [11] "Prev_MI"                   "Prev_PTCA"                
## [13] "Prev_CABG"                 "Pulmonary_disease"        
## [15] "CVA_TIA"                   "Renail_failure"           
## [17] "Peripheral_vessel_disease" "Hypertension"             
## [19] "Known_dislipidemia"        "Diabetes_Mellitus"        
## [21] "Current_Smoker"            "Familial_MI"              
## [23] "FH_below_60"               "Heart_Failure"            
## [25] "SYNTAX_quartiles"          "SYNTAXBIN_classic"        
## [27] "TOTALSYNTAX_quartiles"     "CRP_Quartiles"            
## [29] "Glucose_Quartiles"         "Hemoglobin_Quartiles"     
## [31] "LDL_Quartiles"             "Angiog_descr_domin"       
## [33] "Angiog_num_diss_vesse"     "Total_occlus"             
## [35] "Collaterals"               "Prev_bypass"              
## [37] "Prev_stent"                "Prev_PTCA_angio"          
## [39] "Resid_sten"                "Death"                    
## [41] "Angina"                    "Myocard_inf"              
## [43] "Sec_perc_interv"           "Coro_bypass"              
## [45] "Cerbr_acci"                "Arrythmia_teat"           
## [47] "FW_Death"                  "Angina_pectoris_fu"       
## [49] "Myocard_infarc_fu"         "Sec_perc_interv_fu"       
## [51] "Coronary_bypass_fu"        "Cerebro_vasc_accid"       
## [53] "Arrythmia"                 "Death_date"               
## [55] "Cardiac_cause"             "event_score"              
## [57] "MACE_dichotomous"          "pe_betablock"             
## [59] "pe_calcium"                "pe_nitrate"               
## [61] "pe_ASAacetylz"             "pe_thromb"                
## [63] "pe_ADPclopidog"            "pe_ezeoverchol"           
## [65] "pe_ACE"                    "pe_ATrecep"               
## [67] "pe_diurectica"             "pe_statines"              
## [69] "pe_antiarrhy"              "pe_other"                 
## [71] "ap_nitrate"                "ap_ASAacetyl"             
## [73] "ap_clopido"                "dis_betablock"            
## [75] "dis_ASAacetyl"             "dis_ADPclopido"           
## [77] "dis_ACEinhi"               "dis_statines"             
## [79] "dis_othermed"              "Classic_Quartile"         
## [81] "Classic_Hi_Lo"             "Intermediate_Quartile"    
## [83] "IntMed_Hi_Lo"              "Non_Classic_Quartile"     
## [85] "Non_Classic_Hi_Lo"         "Class_Perc_Quartile"      
## [87] "Int_Perc_Quartile"         "Non_Class_Perc_Quartile"

Continuous variables are:

names(res)[numVar]
##  [1] "Sorting.index"                    "Numeric_Pat_code"                
##  [3] "CC_number"                        "age"                             
##  [5] "Length_cm"                        "Weight"                          
##  [7] "bmi"                              "Heart_rate"                      
##  [9] "Blood_Pressure_systolic"          "Blood_Pressure_diastolic"        
## [11] "Pack_years"                       "SYNTAX"                          
## [13] "Added_SYNTAX"                     "TOTAL_SYNTAX"                    
## [15] "Lab_CRP"                          "Lab_glucose"                     
## [17] "Lab_WBC"                          "Lab_heamogl"                     
## [19] "Lab_segm_neutro"                  "Lab__lymp"                       
## [21] "Lab_mono"                         "Lab_tot_chol"                    
## [23] "Lab_HDL_chol"                     "Lab_trigly"                      
## [25] "Lab_LDL_chol"                     "Blood_pressure_systolic_FU"      
## [27] "Blood_pressure_diastolic_FU"      "Abs_cell_count_classical"        
## [29] "Abs_cell_count_Intmed"            "Abs_cell_count_nonclass"         
## [31] "Class_perc_of_tot_monocyte_count" "IntMed_perc"                     
## [33] "Non_Class_Perc"

Descriptive variables are:

names(res)[charVar]
## character(0)

Tables of categorical variables

catTables <- sapply(res[catVar], function(x) table(as.factor(x)))
lapply(catTables, function(x) x)
## $Hospital
## 
##   1   2   3   4 
##  79 122  97  98 
## 
## $Sex
## 
##   0   1 
## 110 286 
## 
## $Race
## 
##   1   2   3   4 
## 379   7   4   6 
## 
## $BMI_quartile
## 
##   1   2   3   4 
##  97 101  99  91 
## 
## $Systolic_Quartile
## 
##   1   2   3   4 
##  85 101 105  97 
## 
## $Diastolic_Quartile
## 
##   1   2   3   4 
## 104  76  95 113 
## 
## $Pres_diag
## 
##   0   1   2   3 
##  10 312  30  44 
## 
## $Conf_Diagn
## 
##   0   1   2   3   9 
##  10 305  31  37  13 
## 
## $NYHA
## 
##   1   2   3   4 
## 276  78  27  15 
## 
## $Prev_HF
## 
##   0   1 
## 386  10 
## 
## $Prev_MI
## 
##   0   1   2   9 
## 284  98  12   2 
## 
## $Prev_PTCA
## 
##   0   1   2   9 
## 256 106  28   6 
## 
## $Prev_CABG
## 
##   0   1   2 
## 362  33   1 
## 
## $Pulmonary_disease
## 
##   0   1 
## 350  46 
## 
## $CVA_TIA
## 
##   0   1 
## 369  27 
## 
## $Renail_failure
## 
##   0   1 
## 390   6 
## 
## $Peripheral_vessel_disease
## 
##   0   1   2   3   4   9 
## 341  28  10   7   2   8 
## 
## $Hypertension
## 
##   0   1 
## 151 244 
## 
## $Known_dislipidemia
## 
##   0   1 
## 143 252 
## 
## $Diabetes_Mellitus
## 
##   0   1 
## 308  86 
## 
## $Current_Smoker
## 
##   0   1 
## 316  79 
## 
## $Familial_MI
## 
##   0   1 
## 234 152 
## 
## $FH_below_60
## 
##   0   1 
## 232 164 
## 
## $Heart_Failure
## 
##   0   2   3 
## 392   1   1 
## 
## $SYNTAX_quartiles
## 
##  1  2  3  4 
## 56 51 56 54 
## 
## $SYNTAXBIN_classic
## 
##   1   2   3 
## 182  27   8 
## 
## $TOTALSYNTAX_quartiles
## 
##  1  2  3  4 
## 59 49 55 54 
## 
## $CRP_Quartiles
## 
##  1  2  3  4 
## 66 87 83 37 
## 
## $Glucose_Quartiles
## 
##   1   2   3   4 
## 104  95  87 107 
## 
## $Hemoglobin_Quartiles
## 
##  1  2  3  4 
## 94 97 98 88 
## 
## $LDL_Quartiles
## 
##   1   2   3   4 
## 112  98  91  92 
## 
## $Angiog_descr_domin
## 
##   0   1   2 
##  30  48 308 
## 
## $Angiog_num_diss_vesse
## 
##   0   1   2   3   9 
##   8 139 131  83  25 
## 
## $Total_occlus
## 
##   0   1 
## 287  72 
## 
## $Collaterals
## 
##   0   1 
## 295  63 
## 
## $Prev_bypass
## 
##   0   1 
## 330  30 
## 
## $Prev_stent
## 
##   0   1 
## 240 120 
## 
## $Prev_PTCA_angio
## 
##   0   1 
## 337  21 
## 
## $Resid_sten
## 
##   1   2   3 
## 210  59  49 
## 
## $Death
## 
##   0   1 
## 385   1 
## 
## $Angina
## 
##   0   1 
## 374  11 
## 
## $Myocard_inf
## 
##   0   1 
## 382   3 
## 
## $Sec_perc_interv
## 
##   0 
## 383 
## 
## $Coro_bypass
## 
##   0   1 
## 374  11 
## 
## $Cerbr_acci
## 
##   0 
## 385 
## 
## $Arrythmia_teat
## 
##   0   1 
## 382   3 
## 
## $FW_Death
## 
##   0   1 
## 355   1 
## 
## $Angina_pectoris_fu
## 
##   0   1 
## 276  78 
## 
## $Myocard_infarc_fu
## 
##   0   1 
## 350   4 
## 
## $Sec_perc_interv_fu
## 
##   0   1   9 
## 320  25   9 
## 
## $Coronary_bypass_fu
## 
##   0   1   9 
## 334  19   1 
## 
## $Cerebro_vasc_accid
## 
##   0   1 
## 349   5 
## 
## $Arrythmia
## 
##   0   1 
## 342  12 
## 
## $Death_date
## 
##            08/18/2010   2/3/2010 
##        394          1          1 
## 
## $Cardiac_cause
## 
##                definite other cause 
##         394           1           1 
## 
## $event_score
## 
##   0   1   2   3   6   7   8 
## 260  46   4  44   8   1  12 
## 
## $MACE_dichotomous
## 
##   0   1 
## 339  46 
## 
## $pe_betablock
## 
##   0   1 
## 113 269 
## 
## $pe_calcium
## 
##   0   1   2 
## 275  76  31 
## 
## $pe_nitrate
## 
##   0   1 
## 251 131 
## 
## $pe_ASAacetylz
## 
##   0   1 
##  79 303 
## 
## $pe_thromb
## 
##   0   1   2 
## 338  32  12 
## 
## $pe_ADPclopidog
## 
##   0   1 
## 197 185 
## 
## $pe_ezeoverchol
## 
##   0   1   2 
## 376   3   3 
## 
## $pe_ACE
## 
##   0   1 
## 257 125 
## 
## $pe_ATrecep
## 
##   0   1 
## 300  82 
## 
## $pe_diurectica
## 
##   0   1   2   3 
## 299  28  52   3 
## 
## $pe_statines
## 
##   0   1 
##  85 297 
## 
## $pe_antiarrhy
## 
##   0   1 
## 373   9 
## 
## $pe_other
## 
##   0   1 
## 326  56 
## 
## $ap_nitrate
## 
##   0   1 
## 125 257 
## 
## $ap_ASAacetyl
## 
##   0   1 
## 370  11 
## 
## $ap_clopido
## 
##   0   1 
## 347  34 
## 
## $dis_betablock
## 
##   0   1 
##  94 288 
## 
## $dis_ASAacetyl
## 
##   0   1 
##  60 322 
## 
## $dis_ADPclopido
## 
##   0   1 
## 106 276 
## 
## $dis_ACEinhi
## 
##   0   1 
## 179 203 
## 
## $dis_statines
## 
##   0   1 
##  60 322 
## 
## $dis_othermed
## 
##   0   1 
## 326  56 
## 
## $Classic_Quartile
## 
##   1   2   3   4 
##  99  96 104  97 
## 
## $Classic_Hi_Lo
## 
##   0   1 
## 195 201 
## 
## $Intermediate_Quartile
## 
##   1   2   3   4 
##  95 103  97 101 
## 
## $IntMed_Hi_Lo
## 
##   0   1 
## 198 198 
## 
## $Non_Classic_Quartile
## 
##   1   2   3   4 
## 103  91 100 102 
## 
## $Non_Classic_Hi_Lo
## 
##   0   1 
## 194 202 
## 
## $Class_Perc_Quartile
## 
##   1   2   3   4 
##  96 101 108  91 
## 
## $Int_Perc_Quartile
## 
##   1   2   3   4 
##  93 101 101 101 
## 
## $Non_Class_Perc_Quartile
## 
##   1   2   3   4 
##  97 102  98  99

Distributions of numeric variables

numVarGG <- melt(res[,numVar])
## No id variables; using all as measure variables
numDist <- ggplot(data = numVarGG) +
            geom_histogram(mapping = aes(x = value), bins = 15, na.rm = TRUE) +
            facet_wrap(facets = ~variable, ncol = 4, scales = "free")
            
numDist

##Distribution of Expression of NGC

ftrExprsGG <- melt(ftrExprs)
## No id variables; using all as measure variables
ftrDist <- ggplot(data = ftrExprsGG) +
            geom_histogram(mapping = aes(x = value), bins = 15) +
            facet_wrap(facets = ~variable, ncol = 7, scales = "free") +
            theme(
              axis.text.x = element_text(size = 8, angle = 45, hjust = 1, vjust =  1),
              axis.text.y = element_text(size = 8))
ftrDist
## Warning: Removed 2219 rows containing non-finite values (stat_bin).

Box plot of expression of all NGCs

ftrBox <- ggplot(data = ftrExprsGG) +
            geom_boxplot(mapping = aes(x = variable, y = value)) +
            theme(
              axis.text.x = element_text(size = 6, angle = 45, hjust = 1, vjust = 1)
            )
ftrBox
## Warning: Removed 2219 rows containing non-finite values (stat_boxplot).

#Data Imputation for NGC expression

ftrExprsImp <- preProcess(x = ftrExprs, method = "bagImpute")
  
ftrExprs <- predict(object = ftrExprsImp, newdata = ftrExprs)

#Data paraperation for model fitting (Confirmed diagnosis as the responce) ##Remove NA values from the response

All diseased will be put in the same group

ftrExprsNarm <- ftrExprs[ctmmMir$Conf_Diagn %in% 1:3, ]
resNarm <- res[ctmmMir$Conf_Diagn %in% 1:3, ]
#Put the named response into a vector
resNamed <- resNarm$Conf_Diagn
resNamed[resNamed == 1] <- 0
resNamed[resNamed == 2] <- 1
resNamed[resNamed == 3] <- 1
resNamed <- factor(resNamed, levels = c(0,1), labels = c("Stable_CVD", "Unstable_CVD"))
nrow(ftrExprsNarm)
## [1] 373
table(resNamed)
## resNamed
##   Stable_CVD Unstable_CVD 
##          305           68

##Violin plot of features vs response

Violin plot is more handy since the response is categorical. Mean and sd are indicated in red color.

ftrExprsNarmGG <- melt(ftrExprsNarm)
## No id variables; using all as measure variables
ftrExprsNarmGG$response <- resNamed
ftrResScat <- ggplot(data = ftrExprsNarmGG, mapping = aes(x = response, y = value)) +
                geom_violin() +
                stat_summary(fun.data = mean_sdl, geom = "pointrange", size = 0.3, color = "red", fun.args = list(mult = 1)) +
                facet_wrap(facets = ~variable, ncol = 11, scales = "free") +
                theme(
                  axis.text.x = element_text(hjust = 1, vjust = 1, angle = 45)
                )
ftrResScat

##Vacanoplot

ftrParam <- data.frame(Pvalue = -log10(sapply(ftrExprsNarm, function(x) t.test(x ~ resNamed)$p.value)))
ftrParam$FC <- log2(sapply(ftrExprsNarm, function(x) t.test(x ~ resNamed)$estimate[2]/t.test(x ~ resNamed)$estimate[1]))
ftrParam$label <- NA
ftrParam$label[ftrParam$Pvalue > -log10(0.05) & abs(ftrParam$FC) > 0.0125] <- row.names(ftrParam)[ftrParam$Pvalue > -log10(0.05) & abs(ftrParam$FC) > 0.0125]
ftrParam$color <- as.numeric(!is.na(ftrParam$label)) + 1

ggplot(ftrParam) +
  geom_point(mapping = aes(x = FC, y = Pvalue), color =  ftrParam$color) +
  geom_text(mapping = aes(x = FC, y = Pvalue, label = label), color = ftrParam$color, hjust = -0.1, vjust = 0.1, angle = 45, na.rm = TRUE, size = 3) +
  geom_hline(mapping = aes(yintercept = -log10(0.05)), linetype = 2) +
  geom_vline(xintercept = 0.0125, linetype = 2) +
  geom_vline(xintercept = -0.0125, linetype = 2) +
  coord_cartesian(xlim = c(-0.06, 0.06), ylim = c(0, 4)) +
  xlab("log(Fold Change)") +
  ylab("-log(p-value)")

##P-value and Expression Level Does the p-value rely on expression of guidance cues?

ftrParam$meanExprs <- sapply(ftrExprsNarm, mean)

pCut <- 0.2

ftrParam$labelExprs <- NA
ftrParam$labelExprs[ftrParam$Pvalue > -log10(pCut)] <- row.names(ftrParam)[ftrParam$Pvalue > -log10(pCut)]
ftrParam$colorExprs <- as.numeric(!is.na(ftrParam$labelExprs)) + 1

ggplot(ftrParam) +
  geom_point(mapping = aes(x = meanExprs, y = Pvalue), color = ftrParam$colorExprs) +
  geom_text(mapping = aes(x = meanExprs, y = Pvalue, label = labelExprs), color = ftrParam$colorExprs, hjust = -0.1, vjust = 0.1, angle = 45, na.rm = TRUE, size = 3) +
  geom_hline(yintercept = -log10(pCut), linetype = 2) +
  coord_cartesian(ylim = c(0,3.5))

Distribution of -log10 p

hist(ftrParam$Pvalue)

ftrIn <- ftrParam$Pvalue > -log10(pCut)
sum(ftrIn)
## [1] 14
ftrExprsIn <- ftrExprsNarm[,ftrIn]
names(ftrExprsIn)
##  [1] "hsa_let_7f"     "hsa_miR_130b"   "hsa_miR_139_5p" "hsa_miR_145"   
##  [5] "hsa_miR_146a"   "hsa_miR_197"    "hsa_miR_342_3p" "hsa_miR_376c"  
##  [9] "hsa_miR_486_3p" "hsa_let_7e"     "hsa_miR_10a"    "hsa_miR_132"   
## [13] "hsa_miR_133b"   "hsa_miR_885_5p"

##Correlation between features

ftrCorr <- cor(ftrExprsIn)
heatmap(ftrCorr, revC = TRUE, scale = "none")

findCorrelation(ftrCorr, cutoff = 0.8)
## integer(0)

##Filtering by correlations of all features

#Confounding factors

Age and Sex are very common confounding factors in biomarker studies. We examine them also here.

##Examining age

ggplot(resNarm, aes(x = resNamed, y = age)) +
  geom_violin(color = "red") +
  geom_dotplot(binaxis = "y", dotsize = 0.5, stackdir =  "center") +
  geom_text(mapping = aes(x = 1, y = 90, label = paste("p =", round(t.test(resNarm$age ~ resNamed)$p.value, 3))))
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

Age is not very different amoung stable and unstable group. Therefore, age is unlikely to be a confounding factor in this study.

ftrExprsNarmGG$age <- resNarm$age
ggplot(ftrExprsNarmGG, mapping = aes(x = age, y = value)) +
  geom_point(size = 0.5) +
  geom_smooth(method = "lm") +
  facet_wrap(facets = ~variable, scales = "free", ncol = 7) +
                theme(
                  axis.text.x = element_text(hjust = 1, vjust = 1, angle = 45, size = 6)
                )

Age doesn’t correlate much with ftr expression, so that age is again not a confounding factor in this modeling. But age will be included anyway to cancel any confounding effect.

##Examining sex

sexRes <- data.frame(sex = factor(resNarm$Sex, 0:1, c("female", "male")))
sexRes$res <- resNamed
table(sexRes)
##         res
## sex      Stable_CVD Unstable_CVD
##   female         79           20
##   male          226           48
chisq.test(table(sexRes))
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table(sexRes)
## X-squared = 0.19441, df = 1, p-value = 0.6593

Sex is not significantly biased to any group. Sex will also be included anyway to cancell confounding effect.

ftrExprsNarmGG$Sex <- sexRes$sex
ggplot(ftrExprsNarmGG, aes(x = Sex, y = value)) +
  geom_violin() +
  stat_summary(fun.data = mean_sdl, geom = "pointrange", size = 0.3, color = "red", fun.args = list(mult = 1)) +
  facet_wrap(facets = ~variable, ncol = 7, scales = "free") +
  theme(
        axis.text.x = element_text(hjust = 1, vjust = 1, angle = 45)
      )

#Data partition using the samples with a valid confirmed diagnostics, settings of model parameters

##Data partition

set.seed(4000)
trainingSamples <- createDataPartition(y = resNamed, p = 0.8, list = FALSE)
length(trainingSamples)
## [1] 299
resTr <- resNamed[trainingSamples]
resTest <- resNamed[-trainingSamples]

table(resTr)
## resTr
##   Stable_CVD Unstable_CVD 
##          244           55
table(resTest)
## resTest
##   Stable_CVD Unstable_CVD 
##           61           13
#Age and Sex will be included
fea <- cbind(age = as.numeric(resNarm$age), Sex = factor(resNarm$Sex), ftrExprsIn)
fea$Sex <- as.numeric(fea$Sex) -1
feaTr <- fea[trainingSamples, ] 
feaTest <- fea[-trainingSamples, ]



feaTr[1:6, 1:6]
feaTest[1:6, 1:6]

##Model Training parameters (cross validation …)

10 fold cross-validation used

  ctrl = trainControl(method = "repeatedcv", repeats = 10, classProbs = TRUE)

#Model Fitting

##Function for model report

modelReport <- function(model = NULL, obsTr = resTr, obsTest = resTest, newTr = feaTr, newTest = feaTest)
{
  predTr <- predict(object = model, newdata = newTr)
  predTest <- predict(object = model, newdata = newTest)
  predTrProb <- predict(object = model, newdata = newTr, type = "prob")
  predTestProb <- predict(object = model, newdata = newTest, type = "prob")
  
  feaImp <- varImp(model)$importance
  feaImp$fea <- row.names(feaImp)
  colnames(feaImp)[1] <- "Imp"
  feaImp <- feaImp[order(feaImp$Imp, decreasing = T), ]
  feaImp$fea <- factor(feaImp$fea, levels = row.names(feaImp))
  
  feaImpVis <- ggplot(data = feaImp[1:10,], mapping = aes(x = fea, y = Imp)) +
      geom_col() +
      xlab(label = NULL) +
      ylab(label = paste("Variable Importance", model$modelInfo$label)) +
      theme(
        axis.text.x = element_text(hjust = 1, vjust = 1, angle = 45)
      )
    
  cmGG <- expand.grid(y = -1:-12, x = 1:7)
cmGG$label <- c(NA, NA, "Training Set", "Prediction", "Stable_CVD", "Unstable_CVD",
NA, "Training Set", "Accuracy", "Sensitivity", "Specificity", "Kappa",
"Confusion Matrix and Statistics", NA, "Reference", "Stable_CVD", "Training TN", "Training FP",
NA, NA, "Training Accuracy", "Training Sensitivity", "Training Specificity", "Training Kappa",
NA, NA, NA, "Unstable_CVD", "Training FN", "Training TP",
NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA,
NA, NA, "Test Set", "Prediction", "Stable_CVD", "Unstable_CVD",
NA, "Test Set", "Accuracy", "Sensitivity", "Specificity", "Kappa",
NA, NA, "Reference", "Stable_CVD", "Test TN", "Test FP",
NA, NA, "Test Accuracy", "Test Sensitivity", "Test Specificity", "Test Kappa",
NA, NA, NA, "Unstable_CVD", "Test FN", "Test TP",
NA, NA, NA, NA, NA, NA)

cmGG$label[c(5, 16, 53, 64)] <- as.character(model$levels[1])
cmGG$label[c(6, 28, 54, 76)] <- as.character(model$levels[2])

cmTr <- confusionMatrix(data = predTr, reference = obsTr, positive = levels(resTest)[2])
cmTest <- confusionMatrix(data = predTest, reference = obsTest, positive = levels(resTest)[2])

cmGG$label[17] <- cmTr$table[1,1]
cmGG$label[18] <- cmTr$table[2,1]
cmGG$label[29] <- cmTr$table[1,2]
cmGG$label[30] <- cmTr$table[2,2]
cmGG$label[65] <- cmTest$table[1,1]
cmGG$label[66] <- cmTest$table[2,1]
cmGG$label[77] <- cmTest$table[1,2]
cmGG$label[78] <- cmTest$table[2,2]
cmGG$label[21] <- round(cmTr$overall[1], 4)
cmGG$label[22] <- round(cmTr$byClass[1], 4)
cmGG$label[23] <- round(cmTr$byClass[2], 4)
cmGG$label[24] <- round(cmTr$overall[2], 4)
cmGG$label[69] <- round(cmTest$overall[1], 4)
cmGG$label[70] <- round(cmTest$byClass[1], 4)
cmGG$label[71] <- round(cmTest$byClass[2], 4)
cmGG$label[72] <- round(cmTest$overall[2], 4)
  
cmPlot <- ggplot(cmGG) +
  geom_text(aes(x, y, label = label), hjust = 1, na.rm = TRUE) +
  coord_cartesian(xlim = c(0,8), ylim = c(0,-13)) +
  geom_segment(mapping = aes(x = -0.05, y = -2.5, xend = 7.1, yend = -2.5), size = 1) + #Top border
  geom_segment(mapping = aes(x = -0.05, y = -4.5, xend = 7.1, yend = -4.5)) + #Middel border
  geom_segment(mapping = aes(x = -0.05, y = -6.5, xend = 7.1, yend = -6.5), size = 1) + #Bottom border
  geom_segment(mapping = aes(x = 1.1, y = -3.5, xend = 3.1, yend = -3.5)) + #Left reference
  geom_segment(mapping = aes(x = 5.1, y = -3.5, xend = 7.1, yend = -3.5)) + #Right reference
  geom_segment(mapping = aes(x = -0.05, y = -7.5, xend = 2.1, yend = -7.5), size = 1) +
  geom_segment(mapping = aes(x = -0.05, y = -8.5, xend = 2.1, yend = -8.5)) +
  geom_segment(mapping = aes(x = -0.05, y = -12.5, xend = 2.1, yend = -12.5), size = 1) +
  geom_segment(mapping = aes(x = 4.1, y = -7.5, xend = 6.1, yend = -7.5), size = 1) +
  geom_segment(mapping = aes(x = 4.1, y = -8.5, xend = 6.1, yend = -8.5)) +
  geom_segment(mapping = aes(x = 4.1, y = -12.5, xend = 6.1, yend = -12.5), size = 1) +
  
 theme(line = element_blank(),
       text = element_blank(),
       title = element_blank(),
       plot.background = element_blank(),
       panel.background = element_blank()
       )
  
  rocCurve <- roc(response = obsTest, predictor = predTestProb[,1])
  
  return(list(
              model,
              feaImpVis,
              cmTr,
              cmTest,
              plot(rocCurve, legacy.axes = TRUE),
              cmPlot
              )
         )
}

##Boosted Logistic regression model

registerDoParallel(detectCores())
ModelLogit <- train(
    x = feaTr, 
    y = resTr, 
    method = "LogitBoost", 
    preProcess = c("center","scale"), 
    trControl = ctrl,
    metric = "Kappa",
    tuneLength = 20
    )
stopImplicitCluster()
modelReport(model = ModelLogit)
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases

## [[1]]
## Boosted Logistic Regression 
## 
## 299 samples
##  16 predictor
##   2 classes: 'Stable_CVD', 'Unstable_CVD' 
## 
## Pre-processing: centered (16), scaled (16) 
## Resampling: Cross-Validated (10 fold, repeated 10 times) 
## Summary of sample sizes: 270, 268, 268, 270, 268, 268, ... 
## Resampling results across tuning parameters:
## 
##   nIter  Accuracy   Kappa     
##    11    0.7567527  0.05215187
##    21    0.7458762  0.07874250
##    31    0.7360871  0.06892466
##    41    0.7296863  0.04195305
##    51    0.7217468  0.02398651
##    61    0.7272785  0.03630190
##    71    0.7211835  0.04058436
##    81    0.7139444  0.01309314
##    91    0.7240945  0.02709477
##   101    0.7297887  0.04456887
##   111    0.7330423  0.04893949
##   121    0.7304968  0.04451947
##   131    0.7253726  0.02231876
##   141    0.7210879  0.01883440
##   151    0.7287942  0.03170506
##   161    0.7300393  0.04135351
##   171    0.7311850  0.03346902
##   181    0.7318050  0.03199882
##   191    0.7302503  0.02815371
##   201    0.7342733  0.04654186
## 
## Kappa was used to select the optimal model using the largest value.
## The final value used for the model was nIter = 21.
## 
## [[2]]

## 
## [[3]]
## Confusion Matrix and Statistics
## 
##               Reference
## Prediction     Stable_CVD Unstable_CVD
##   Stable_CVD          236           30
##   Unstable_CVD          8           25
##                                           
##                Accuracy : 0.8729          
##                  95% CI : (0.8298, 0.9085)
##     No Information Rate : 0.8161          
##     P-Value [Acc > NIR] : 0.0052771       
##                                           
##                   Kappa : 0.4991          
##                                           
##  Mcnemar's Test P-Value : 0.0006577       
##                                           
##             Sensitivity : 0.45455         
##             Specificity : 0.96721         
##          Pos Pred Value : 0.75758         
##          Neg Pred Value : 0.88722         
##              Prevalence : 0.18395         
##          Detection Rate : 0.08361         
##    Detection Prevalence : 0.11037         
##       Balanced Accuracy : 0.71088         
##                                           
##        'Positive' Class : Unstable_CVD    
##                                           
## 
## [[4]]
## Confusion Matrix and Statistics
## 
##               Reference
## Prediction     Stable_CVD Unstable_CVD
##   Stable_CVD           51            8
##   Unstable_CVD         10            5
##                                          
##                Accuracy : 0.7568         
##                  95% CI : (0.6431, 0.849)
##     No Information Rate : 0.8243         
##     P-Value [Acc > NIR] : 0.9486         
##                                          
##                   Kappa : 0.2081         
##                                          
##  Mcnemar's Test P-Value : 0.8137         
##                                          
##             Sensitivity : 0.38462        
##             Specificity : 0.83607        
##          Pos Pred Value : 0.33333        
##          Neg Pred Value : 0.86441        
##              Prevalence : 0.17568        
##          Detection Rate : 0.06757        
##    Detection Prevalence : 0.20270        
##       Balanced Accuracy : 0.61034        
##                                          
##        'Positive' Class : Unstable_CVD   
##                                          
## 
## [[5]]
## 
## Call:
## roc.default(response = obsTest, predictor = predTestProb[, 1])
## 
## Data: predTestProb[, 1] in 61 controls (obsTest Stable_CVD) > 13 cases (obsTest Unstable_CVD).
## Area under the curve: 0.6885
## 
## [[6]]

##Linear Discriminant Analysis

ModelLda <- train(
    x = feaTr, 
    y = resTr, 
    method = "lda", 
    preProcess = c("center","scale"), 
    metric = "Kappa",
    trControl = ctrl
    )
modelReport(model = ModelLda)
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases

## [[1]]
## Linear Discriminant Analysis 
## 
## 299 samples
##  16 predictor
##   2 classes: 'Stable_CVD', 'Unstable_CVD' 
## 
## Pre-processing: centered (16), scaled (16) 
## Resampling: Cross-Validated (10 fold, repeated 10 times) 
## Summary of sample sizes: 269, 269, 270, 270, 270, 268, ... 
## Resampling results:
## 
##   Accuracy   Kappa     
##   0.8161772  0.07052771
## 
## 
## [[2]]

## 
## [[3]]
## Confusion Matrix and Statistics
## 
##               Reference
## Prediction     Stable_CVD Unstable_CVD
##   Stable_CVD          242           51
##   Unstable_CVD          2            4
##                                           
##                Accuracy : 0.8227          
##                  95% CI : (0.7746, 0.8643)
##     No Information Rate : 0.8161          
##     P-Value [Acc > NIR] : 0.4173          
##                                           
##                   Kappa : 0.0985          
##                                           
##  Mcnemar's Test P-Value : 4.301e-11       
##                                           
##             Sensitivity : 0.07273         
##             Specificity : 0.99180         
##          Pos Pred Value : 0.66667         
##          Neg Pred Value : 0.82594         
##              Prevalence : 0.18395         
##          Detection Rate : 0.01338         
##    Detection Prevalence : 0.02007         
##       Balanced Accuracy : 0.53227         
##                                           
##        'Positive' Class : Unstable_CVD    
##                                           
## 
## [[4]]
## Confusion Matrix and Statistics
## 
##               Reference
## Prediction     Stable_CVD Unstable_CVD
##   Stable_CVD           59           12
##   Unstable_CVD          2            1
##                                          
##                Accuracy : 0.8108         
##                  95% CI : (0.703, 0.8925)
##     No Information Rate : 0.8243         
##     P-Value [Acc > NIR] : 0.68580        
##                                          
##                   Kappa : 0.0633         
##                                          
##  Mcnemar's Test P-Value : 0.01616        
##                                          
##             Sensitivity : 0.07692        
##             Specificity : 0.96721        
##          Pos Pred Value : 0.33333        
##          Neg Pred Value : 0.83099        
##              Prevalence : 0.17568        
##          Detection Rate : 0.01351        
##    Detection Prevalence : 0.04054        
##       Balanced Accuracy : 0.52207        
##                                          
##        'Positive' Class : Unstable_CVD   
##                                          
## 
## [[5]]
## 
## Call:
## roc.default(response = obsTest, predictor = predTestProb[, 1])
## 
## Data: predTestProb[, 1] in 61 controls (obsTest Stable_CVD) > 13 cases (obsTest Unstable_CVD).
## Area under the curve: 0.7251
## 
## [[6]]

##K nearest Neighbours

ModelKnn <- train(
    x = feaTr, 
    y = resTr, 
    method = "knn", 
    preProcess = c("center","scale"), 
    trControl = ctrl,
    metric = "Kappa",
    tuneLength = 10
    )
modelReport(model = ModelKnn)
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases

## [[1]]
## k-Nearest Neighbors 
## 
## 299 samples
##  16 predictor
##   2 classes: 'Stable_CVD', 'Unstable_CVD' 
## 
## Pre-processing: centered (16), scaled (16) 
## Resampling: Cross-Validated (10 fold, repeated 10 times) 
## Summary of sample sizes: 269, 268, 270, 270, 268, 268, ... 
## Resampling results across tuning parameters:
## 
##   k   Accuracy   Kappa     
##    5  0.8045962  0.03880772
##    7  0.8125577  0.02740495
##    9  0.8182151  0.04088871
##   11  0.8198717  0.03345207
##   13  0.8185269  0.02036498
##   15  0.8185499  0.01853615
##   17  0.8162258  0.00000000
##   19  0.8162258  0.00000000
##   21  0.8162258  0.00000000
##   23  0.8162258  0.00000000
## 
## Kappa was used to select the optimal model using the largest value.
## The final value used for the model was k = 9.
## 
## [[2]]

## 
## [[3]]
## Confusion Matrix and Statistics
## 
##               Reference
## Prediction     Stable_CVD Unstable_CVD
##   Stable_CVD          243           52
##   Unstable_CVD          1            3
##                                           
##                Accuracy : 0.8227          
##                  95% CI : (0.7746, 0.8643)
##     No Information Rate : 0.8161          
##     P-Value [Acc > NIR] : 0.4173          
##                                           
##                   Kappa : 0.0787          
##                                           
##  Mcnemar's Test P-Value : 6.51e-12        
##                                           
##             Sensitivity : 0.05455         
##             Specificity : 0.99590         
##          Pos Pred Value : 0.75000         
##          Neg Pred Value : 0.82373         
##              Prevalence : 0.18395         
##          Detection Rate : 0.01003         
##    Detection Prevalence : 0.01338         
##       Balanced Accuracy : 0.52522         
##                                           
##        'Positive' Class : Unstable_CVD    
##                                           
## 
## [[4]]
## Confusion Matrix and Statistics
## 
##               Reference
## Prediction     Stable_CVD Unstable_CVD
##   Stable_CVD           60           13
##   Unstable_CVD          1            0
##                                          
##                Accuracy : 0.8108         
##                  95% CI : (0.703, 0.8925)
##     No Information Rate : 0.8243         
##     P-Value [Acc > NIR] : 0.685800       
##                                          
##                   Kappa : -0.0257        
##                                          
##  Mcnemar's Test P-Value : 0.003283       
##                                          
##             Sensitivity : 0.00000        
##             Specificity : 0.98361        
##          Pos Pred Value : 0.00000        
##          Neg Pred Value : 0.82192        
##              Prevalence : 0.17568        
##          Detection Rate : 0.00000        
##    Detection Prevalence : 0.01351        
##       Balanced Accuracy : 0.49180        
##                                          
##        'Positive' Class : Unstable_CVD   
##                                          
## 
## [[5]]
## 
## Call:
## roc.default(response = obsTest, predictor = predTestProb[, 1])
## 
## Data: predTestProb[, 1] in 61 controls (obsTest Stable_CVD) > 13 cases (obsTest Unstable_CVD).
## Area under the curve: 0.6885
## 
## [[6]]

##Naive Bayes

nbGrid <- expand.grid(laplace = 1:3, usekernel = c(T,F), adjust = 1:3)
registerDoParallel(detectCores())
ModelNb <- train(
    x = feaTr, 
    y = resTr, 
    method = "naive_bayes", 
    preProcess = c("center","scale"), 
    metric = "Kappa",
    trControl = ctrl, 
    tuneGrid = nbGrid
    )
stopImplicitCluster()
modelReport(model = ModelNb)
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases

## [[1]]
## Naive Bayes 
## 
## 299 samples
##  16 predictor
##   2 classes: 'Stable_CVD', 'Unstable_CVD' 
## 
## Pre-processing: centered (16), scaled (16) 
## Resampling: Cross-Validated (10 fold, repeated 10 times) 
## Summary of sample sizes: 269, 269, 270, 268, 269, 270, ... 
## Resampling results across tuning parameters:
## 
##   laplace  usekernel  adjust  Accuracy   Kappa     
##   1        FALSE      1       0.7796466  0.04972852
##   1        FALSE      2       0.7796466  0.04972852
##   1        FALSE      3       0.7796466  0.04972852
##   1         TRUE      1       0.7785091  0.07159505
##   1         TRUE      2       0.8010986  0.04359518
##   1         TRUE      3       0.8134954  0.03316732
##   2        FALSE      1       0.7796466  0.04972852
##   2        FALSE      2       0.7796466  0.04972852
##   2        FALSE      3       0.7796466  0.04972852
##   2         TRUE      1       0.7785091  0.07159505
##   2         TRUE      2       0.8010986  0.04359518
##   2         TRUE      3       0.8134954  0.03316732
##   3        FALSE      1       0.7796466  0.04972852
##   3        FALSE      2       0.7796466  0.04972852
##   3        FALSE      3       0.7796466  0.04972852
##   3         TRUE      1       0.7785091  0.07159505
##   3         TRUE      2       0.8010986  0.04359518
##   3         TRUE      3       0.8134954  0.03316732
## 
## Kappa was used to select the optimal model using the largest value.
## The final values used for the model were laplace = 1, usekernel = TRUE
##  and adjust = 1.
## 
## [[2]]

## 
## [[3]]
## Confusion Matrix and Statistics
## 
##               Reference
## Prediction     Stable_CVD Unstable_CVD
##   Stable_CVD          242           42
##   Unstable_CVD          2           13
##                                          
##                Accuracy : 0.8528         
##                  95% CI : (0.8075, 0.891)
##     No Information Rate : 0.8161         
##     P-Value [Acc > NIR] : 0.05557        
##                                          
##                   Kappa : 0.3176         
##                                          
##  Mcnemar's Test P-Value : 4.116e-09      
##                                          
##             Sensitivity : 0.23636        
##             Specificity : 0.99180        
##          Pos Pred Value : 0.86667        
##          Neg Pred Value : 0.85211        
##              Prevalence : 0.18395        
##          Detection Rate : 0.04348        
##    Detection Prevalence : 0.05017        
##       Balanced Accuracy : 0.61408        
##                                          
##        'Positive' Class : Unstable_CVD   
##                                          
## 
## [[4]]
## Confusion Matrix and Statistics
## 
##               Reference
## Prediction     Stable_CVD Unstable_CVD
##   Stable_CVD           57           12
##   Unstable_CVD          4            1
##                                           
##                Accuracy : 0.7838          
##                  95% CI : (0.6728, 0.8711)
##     No Information Rate : 0.8243          
##     P-Value [Acc > NIR] : 0.85693         
##                                           
##                   Kappa : 0.015           
##                                           
##  Mcnemar's Test P-Value : 0.08012         
##                                           
##             Sensitivity : 0.07692         
##             Specificity : 0.93443         
##          Pos Pred Value : 0.20000         
##          Neg Pred Value : 0.82609         
##              Prevalence : 0.17568         
##          Detection Rate : 0.01351         
##    Detection Prevalence : 0.06757         
##       Balanced Accuracy : 0.50567         
##                                           
##        'Positive' Class : Unstable_CVD    
##                                           
## 
## [[5]]
## 
## Call:
## roc.default(response = obsTest, predictor = predTestProb[, 1])
## 
## Data: predTestProb[, 1] in 61 controls (obsTest Stable_CVD) > 13 cases (obsTest Unstable_CVD).
## Area under the curve: 0.5851
## 
## [[6]]

##Bagged CART

registerDoParallel(detectCores())
ModelBagCart <- train(
    x = feaTr, 
    y = resTr, 
    method = "treebag", 
    preProcess = c("center","scale"), 
    metric = "Kappa",
    trControl = ctrl
    )
stopImplicitCluster()
modelReport(model = ModelBagCart)
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases

## [[1]]
## Bagged CART 
## 
## 299 samples
##  16 predictor
##   2 classes: 'Stable_CVD', 'Unstable_CVD' 
## 
## Pre-processing: centered (16), scaled (16) 
## Resampling: Cross-Validated (10 fold, repeated 10 times) 
## Summary of sample sizes: 270, 269, 268, 268, 268, 270, ... 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8041531  0.1031237
## 
## 
## [[2]]

## 
## [[3]]
## Confusion Matrix and Statistics
## 
##               Reference
## Prediction     Stable_CVD Unstable_CVD
##   Stable_CVD          244            0
##   Unstable_CVD          0           55
##                                       
##                Accuracy : 1           
##                  95% CI : (0.9877, 1) 
##     No Information Rate : 0.8161      
##     P-Value [Acc > NIR] : < 2.2e-16   
##                                       
##                   Kappa : 1           
##                                       
##  Mcnemar's Test P-Value : NA          
##                                       
##             Sensitivity : 1.0000      
##             Specificity : 1.0000      
##          Pos Pred Value : 1.0000      
##          Neg Pred Value : 1.0000      
##              Prevalence : 0.1839      
##          Detection Rate : 0.1839      
##    Detection Prevalence : 0.1839      
##       Balanced Accuracy : 1.0000      
##                                       
##        'Positive' Class : Unstable_CVD
##                                       
## 
## [[4]]
## Confusion Matrix and Statistics
## 
##               Reference
## Prediction     Stable_CVD Unstable_CVD
##   Stable_CVD           56           12
##   Unstable_CVD          5            1
##                                           
##                Accuracy : 0.7703          
##                  95% CI : (0.6579, 0.8601)
##     No Information Rate : 0.8243          
##     P-Value [Acc > NIR] : 0.9117          
##                                           
##                   Kappa : -0.0064         
##                                           
##  Mcnemar's Test P-Value : 0.1456          
##                                           
##             Sensitivity : 0.07692         
##             Specificity : 0.91803         
##          Pos Pred Value : 0.16667         
##          Neg Pred Value : 0.82353         
##              Prevalence : 0.17568         
##          Detection Rate : 0.01351         
##    Detection Prevalence : 0.08108         
##       Balanced Accuracy : 0.49748         
##                                           
##        'Positive' Class : Unstable_CVD    
##                                           
## 
## [[5]]
## 
## Call:
## roc.default(response = obsTest, predictor = predTestProb[, 1])
## 
## Data: predTestProb[, 1] in 61 controls (obsTest Stable_CVD) > 13 cases (obsTest Unstable_CVD).
## Area under the curve: 0.6236
## 
## [[6]]

##Random Forest

registerDoParallel(detectCores())
ModelRf <- train(
    x = feaTr, 
    y = resTr, 
    method = "rf", 
    preProcess = c("center","scale"), 
    trControl = ctrl,
    metric = "Kappa",
    tuneLength = 10
    )
stopImplicitCluster()
modelReport(model = ModelRf)
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases

## [[1]]
## Random Forest 
## 
## 299 samples
##  16 predictor
##   2 classes: 'Stable_CVD', 'Unstable_CVD' 
## 
## Pre-processing: centered (16), scaled (16) 
## Resampling: Cross-Validated (10 fold, repeated 10 times) 
## Summary of sample sizes: 270, 269, 269, 268, 269, 269, ... 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa     
##    2    0.8232510  0.07005336
##    3    0.8215284  0.07138923
##    5    0.8134364  0.06064995
##    6    0.8124587  0.05910540
##    8    0.8114357  0.06319430
##    9    0.8127912  0.07339022
##   11    0.8091009  0.06477150
##   12    0.8101238  0.07499867
##   14    0.8071009  0.06145380
##   16    0.8081346  0.07594190
## 
## Kappa was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 16.
## 
## [[2]]

## 
## [[3]]
## Confusion Matrix and Statistics
## 
##               Reference
## Prediction     Stable_CVD Unstable_CVD
##   Stable_CVD          244            0
##   Unstable_CVD          0           55
##                                       
##                Accuracy : 1           
##                  95% CI : (0.9877, 1) 
##     No Information Rate : 0.8161      
##     P-Value [Acc > NIR] : < 2.2e-16   
##                                       
##                   Kappa : 1           
##                                       
##  Mcnemar's Test P-Value : NA          
##                                       
##             Sensitivity : 1.0000      
##             Specificity : 1.0000      
##          Pos Pred Value : 1.0000      
##          Neg Pred Value : 1.0000      
##              Prevalence : 0.1839      
##          Detection Rate : 0.1839      
##    Detection Prevalence : 0.1839      
##       Balanced Accuracy : 1.0000      
##                                       
##        'Positive' Class : Unstable_CVD
##                                       
## 
## [[4]]
## Confusion Matrix and Statistics
## 
##               Reference
## Prediction     Stable_CVD Unstable_CVD
##   Stable_CVD           57           12
##   Unstable_CVD          4            1
##                                           
##                Accuracy : 0.7838          
##                  95% CI : (0.6728, 0.8711)
##     No Information Rate : 0.8243          
##     P-Value [Acc > NIR] : 0.85693         
##                                           
##                   Kappa : 0.015           
##                                           
##  Mcnemar's Test P-Value : 0.08012         
##                                           
##             Sensitivity : 0.07692         
##             Specificity : 0.93443         
##          Pos Pred Value : 0.20000         
##          Neg Pred Value : 0.82609         
##              Prevalence : 0.17568         
##          Detection Rate : 0.01351         
##    Detection Prevalence : 0.06757         
##       Balanced Accuracy : 0.50567         
##                                           
##        'Positive' Class : Unstable_CVD    
##                                           
## 
## [[5]]
## 
## Call:
## roc.default(response = obsTest, predictor = predTestProb[, 1])
## 
## Data: predTestProb[, 1] in 61 controls (obsTest Stable_CVD) > 13 cases (obsTest Unstable_CVD).
## Area under the curve: 0.6828
## 
## [[6]]

##Stochastic Gradient Boosting

gbmGrid <- expand.grid(interaction.depth = c(3, 5),
                       n.trees = c(100, 300, 500),
                       shrinkage = c(0.01, 0.1),
                       n.minobsinnode = c(5, 10))
registerDoParallel(detectCores())
ModelGbm <- train(
    x = feaTr, 
    y = resTr, 
    method = "gbm", 
    preProcess = c("center","scale"), 
    trControl = ctrl,
    metric = "Kappa",
    tuneGrid = gbmGrid
    )
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        0.9519            -nan     0.0100   -0.0001
##      2        0.9495            -nan     0.0100    0.0001
##      3        0.9468            -nan     0.0100    0.0002
##      4        0.9446            -nan     0.0100    0.0005
##      5        0.9432            -nan     0.0100   -0.0002
##      6        0.9413            -nan     0.0100    0.0003
##      7        0.9387            -nan     0.0100    0.0002
##      8        0.9360            -nan     0.0100    0.0000
##      9        0.9337            -nan     0.0100    0.0000
##     10        0.9318            -nan     0.0100   -0.0001
##     20        0.9117            -nan     0.0100    0.0002
##     40        0.8718            -nan     0.0100    0.0005
##     60        0.8410            -nan     0.0100    0.0000
##     80        0.8107            -nan     0.0100   -0.0001
##    100        0.7844            -nan     0.0100    0.0002
##    120        0.7615            -nan     0.0100   -0.0003
##    140        0.7394            -nan     0.0100   -0.0001
##    160        0.7174            -nan     0.0100    0.0001
##    180        0.6995            -nan     0.0100   -0.0003
##    200        0.6819            -nan     0.0100   -0.0000
##    220        0.6627            -nan     0.0100   -0.0000
##    240        0.6468            -nan     0.0100   -0.0004
##    260        0.6316            -nan     0.0100   -0.0002
##    280        0.6152            -nan     0.0100   -0.0004
##    300        0.6000            -nan     0.0100   -0.0003
##    320        0.5859            -nan     0.0100   -0.0002
##    340        0.5722            -nan     0.0100   -0.0003
##    360        0.5592            -nan     0.0100   -0.0003
##    380        0.5484            -nan     0.0100   -0.0007
##    400        0.5370            -nan     0.0100   -0.0002
##    420        0.5266            -nan     0.0100   -0.0001
##    440        0.5159            -nan     0.0100   -0.0000
##    460        0.5054            -nan     0.0100   -0.0001
##    480        0.4959            -nan     0.0100   -0.0002
##    500        0.4863            -nan     0.0100   -0.0003
stopImplicitCluster()
modelReport(model = ModelGbm)
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases

## [[1]]
## Stochastic Gradient Boosting 
## 
## 299 samples
##  16 predictor
##   2 classes: 'Stable_CVD', 'Unstable_CVD' 
## 
## Pre-processing: centered (16), scaled (16) 
## Resampling: Cross-Validated (10 fold, repeated 10 times) 
## Summary of sample sizes: 269, 269, 269, 269, 268, 270, ... 
## Resampling results across tuning parameters:
## 
##   shrinkage  interaction.depth  n.minobsinnode  n.trees  Accuracy 
##   0.01       3                   5              100      0.8162258
##   0.01       3                   5              300      0.8145491
##   0.01       3                   5              500      0.8064112
##   0.01       3                  10              100      0.8162258
##   0.01       3                  10              300      0.8081568
##   0.01       3                  10              500      0.8081461
##   0.01       5                   5              100      0.8169155
##   0.01       5                   5              300      0.8118472
##   0.01       5                   5              500      0.8017093
##   0.01       5                  10              100      0.8162258
##   0.01       5                  10              300      0.8074687
##   0.01       5                  10              500      0.8054557
##   0.10       3                   5              100      0.7787197
##   0.10       3                   5              300      0.7654983
##   0.10       3                   5              500      0.7638877
##   0.10       3                  10              100      0.7825977
##   0.10       3                  10              300      0.7751594
##   0.10       3                  10              500      0.7711809
##   0.10       5                   5              100      0.7870753
##   0.10       5                   5              300      0.7759933
##   0.10       5                   5              500      0.7759473
##   0.10       5                  10              100      0.7911357
##   0.10       5                  10              300      0.7806967
##   0.10       5                  10              500      0.7743052
##   Kappa      
##   0.000000000
##   0.071911789
##   0.082173775
##   0.000000000
##   0.033147587
##   0.073365107
##   0.005853659
##   0.072947095
##   0.068893186
##   0.000000000
##   0.038448072
##   0.068715408
##   0.054473658
##   0.038119087
##   0.039233275
##   0.046602867
##   0.051840044
##   0.049827129
##   0.065503828
##   0.058146645
##   0.061981704
##   0.060132406
##   0.049749924
##   0.043383832
## 
## Kappa was used to select the optimal model using the largest value.
## The final values used for the model were n.trees = 500,
##  interaction.depth = 3, shrinkage = 0.01 and n.minobsinnode = 5.
## 
## [[2]]

## 
## [[3]]
## Confusion Matrix and Statistics
## 
##               Reference
## Prediction     Stable_CVD Unstable_CVD
##   Stable_CVD          244           34
##   Unstable_CVD          0           21
##                                         
##                Accuracy : 0.8863        
##                  95% CI : (0.8447, 0.92)
##     No Information Rate : 0.8161        
##     P-Value [Acc > NIR] : 0.0006363     
##                                         
##                   Kappa : 0.502         
##                                         
##  Mcnemar's Test P-Value : 1.519e-08     
##                                         
##             Sensitivity : 0.38182       
##             Specificity : 1.00000       
##          Pos Pred Value : 1.00000       
##          Neg Pred Value : 0.87770       
##              Prevalence : 0.18395       
##          Detection Rate : 0.07023       
##    Detection Prevalence : 0.07023       
##       Balanced Accuracy : 0.69091       
##                                         
##        'Positive' Class : Unstable_CVD  
##                                         
## 
## [[4]]
## Confusion Matrix and Statistics
## 
##               Reference
## Prediction     Stable_CVD Unstable_CVD
##   Stable_CVD           55           12
##   Unstable_CVD          6            1
##                                          
##                Accuracy : 0.7568         
##                  95% CI : (0.6431, 0.849)
##     No Information Rate : 0.8243         
##     P-Value [Acc > NIR] : 0.9486         
##                                          
##                   Kappa : -0.0262        
##                                          
##  Mcnemar's Test P-Value : 0.2386         
##                                          
##             Sensitivity : 0.07692        
##             Specificity : 0.90164        
##          Pos Pred Value : 0.14286        
##          Neg Pred Value : 0.82090        
##              Prevalence : 0.17568        
##          Detection Rate : 0.01351        
##    Detection Prevalence : 0.09459        
##       Balanced Accuracy : 0.48928        
##                                          
##        'Positive' Class : Unstable_CVD   
##                                          
## 
## [[5]]
## 
## Call:
## roc.default(response = obsTest, predictor = predTestProb[, 1])
## 
## Data: predTestProb[, 1] in 61 controls (obsTest Stable_CVD) > 13 cases (obsTest Unstable_CVD).
## Area under the curve: 0.6772
## 
## [[6]]

#Data summarizations

Comparison of All tested models

Information retrieval

modelBestTune <- function(model = NULL)
{
  bestTuneParam <- as.data.frame(model$bestTune)
  resultTable <- model$results
  n <- ncol(bestTuneParam)
  bestTuneParam <- as.data.frame(bestTuneParam[,names(resultTable)[1:n]])
  
  bestTune <- rep(TRUE, nrow(resultTable))
  for (i in 1:n)
    bestTune <- bestTune & (resultTable[,i] == bestTuneParam[1,i])
  bestModel <- as.matrix(resultTable[bestTune, (n+1):ncol(resultTable)])
  row.names(bestModel) <- model$modelInfo$label
  
  reportBest <- modelReport(model)
  
  bestModel <- cbind(bestModel, 
                    t(as.matrix(reportBest[[3]]$overall[1:2])), 
                    t(as.matrix(reportBest[[3]]$byClass[1:2])),
                    t(as.matrix(reportBest[[4]]$overall[1:2])), 
                    t(as.matrix(reportBest[[4]]$byClass[1:2])),
                    auc = as.numeric(reportBest[[5]]$auc))
  colnames(bestModel)[5:8] <- paste("Tr", colnames(bestModel)[5:8], sep = "_")
  colnames(bestModel)[9:12] <- paste("Test", colnames(bestModel)[9:12], sep = "_")
  return(bestModel)
}


modelList <- mget(ls(pattern = "Model"))
modelSum <- do.call(rbind, lapply(X = modelList, FUN = modelBestTune))
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases
modelSum <- data.frame(modelSum)
modelSum$modelName <- row.names(modelSum)

##Names and abbreviations of Models

kable(cbind(Name = row.names(modelSum), Abbreviation = as.character(modelSum$modelName)))
Name Abbreviation
Bagged CART Bagged CART
Stochastic Gradient Boosting Stochastic Gradient Boosting
k-Nearest Neighbors k-Nearest Neighbors
Linear Discriminant Analysis Linear Discriminant Analysis
Boosted Logistic Regression Boosted Logistic Regression
Naive Bayes Naive Bayes
Random Forest Random Forest

##Accuracy in Cross Validation/Training/Test

ggplot(data = modelSum) +
  geom_point(mapping = aes(x = modelName, y = Accuracy, color = "cv"), size = 2) +
  geom_errorbar(mapping = aes(x = modelName, 
              ymax = Accuracy + AccuracySD,
              ymin = Accuracy - AccuracySD,
              color = "cv"
              ), width = 0.3, show.legend = FALSE) +
  geom_point(mapping = aes(x = modelName, y = Tr_Accuracy, color = "Training"), size = 2) +
  geom_point(mapping = aes(x = modelName, y = Test_Accuracy, color = "Test"), size = 2) +
  geom_hline(yintercept = 282/345, linetype = 3 ) +
  xlab("Models") +
  ylab("Accuracy") + 
  coord_flip() +
  theme_bw()

##Kappa

ggplot(data = modelSum) +
  geom_point(mapping = aes(x = modelName, y = Kappa, color = "cv"), size = 2) +
  geom_errorbar(mapping = aes(x = modelName, 
               ymax = Kappa + KappaSD,
              ymin = Kappa - KappaSD,
              color = "cv"
              ), width = 0.3) +
  geom_point(mapping = aes(x = modelName, y = Tr_Kappa, color = "Training"), size = 2) +
  geom_point(mapping = aes(x = modelName, y = Test_Kappa, color = "Test"), size = 2) +
  xlab("Models") +
  ylab("Kappa") + 
  coord_flip() +
  theme_bw()

##Sensitivity

ggplot(data = modelSum) +
  geom_point(mapping = aes(x = modelName, y = Tr_Sensitivity, color = "Sensitivity Training"), size = 2) +
  geom_point(mapping = aes(x = modelName, y = Test_Sensitivity, color = "Sensitivity Test"), size = 2) +
  xlab("Models") +
  ylab("Sensitivity") + 
  coord_flip() +
  theme_bw()

##Specificity

ggplot(data = modelSum) +
  geom_point(mapping = aes(x = modelName, y = Tr_Specificity, color = "Specificity Training"), size = 2) +
  geom_point(mapping = aes(x = modelName, y = Test_Specificity, color = "Specificity Test"), size = 2) +
  xlab("Models") +
  ylab("Specificity") + 
  coord_flip() +
  theme_bw()

##The Specificity-Sensitivity Plot

ggplot(data = modelSum) +
  geom_point(mapping = aes(x = 1 - Tr_Specificity, y = Tr_Sensitivity, color = "Training"), size = 2) +
  geom_point(mapping = aes(x = 1 - Test_Specificity, y = Test_Sensitivity, color = "Test"), size = 2) +
  geom_text(mapping = aes(x = 1 - Tr_Specificity, y = Tr_Sensitivity, color = "Training", label = modelName), size = 3, angle = 45, hjust = 0, show.legend = FALSE) +
  geom_text(mapping = aes(x = 1 - Test_Specificity, y = Test_Sensitivity, color = "Test", label = modelName), size = 3, angle = 45, hjust = 0, show.legend = FALSE) + 
  xlab("1 - Specificity") +
  ylab("Sensitivity") +
  coord_cartesian(xlim = c(0,1)) +
  geom_abline(slope = 1, linetype = 3) +
  theme_bw(
  )

##Area under curve

ggplot(data = modelSum) +
  geom_point(mapping = aes(x = modelName, y = auc), size = 2) +
  xlab("Models") +
  ylab("Area Under ROC curve") + 
  coord_flip() +
  theme_bw()

#Conclusion None of the performance of the above models is satisfactory, meaning that ftrs are weak predictors of stable and unstable CVD. Gbm performs better than other models, with best Area Under ROC curve and best sensitivity in the test dataset.

#Session info

save.image(file = "./Rdata/Mir_Stable_NonStable_new.RData")
sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-conda_cos6-linux-gnu (64-bit)
## Running under: Ubuntu 19.04
## 
## Matrix products: default
## BLAS/LAPACK: /home/huayu/anaconda3/envs/rstudio/lib/R/lib/libRblas.so
## 
## locale:
## [1] en_GB.UTF-8
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] gbm_2.1.5           randomForest_4.6-14 e1071_1.7-2        
##  [4] plyr_1.8.4          ipred_0.9-9         naivebayes_0.9.6   
##  [7] MASS_7.3-51.4       caTools_1.17.1.2    pROC_1.15.0        
## [10] doParallel_1.0.15   iterators_1.0.12    foreach_1.4.7      
## [13] caret_6.0-84        lattice_0.20-38     reshape2_1.4.3     
## [16] ggplot2_3.2.0       knitr_1.23         
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.2          lubridate_1.7.4     class_7.3-15       
##  [4] assertthat_0.2.1    digest_0.6.20       R6_2.4.0           
##  [7] backports_1.1.4     acepack_1.4.1       stats4_3.5.1       
## [10] evaluate_0.14       highr_0.8           pillar_1.4.2       
## [13] rlang_0.4.0         lazyeval_0.2.2      rstudioapi_0.10    
## [16] data.table_1.12.2   rpart_4.1-15        Matrix_1.2-17      
## [19] checkmate_1.9.4     rmarkdown_1.14      labeling_0.3       
## [22] splines_3.5.1       foreign_0.8-71      gower_0.2.1        
## [25] stringr_1.4.0       htmlwidgets_1.3     munsell_0.5.0      
## [28] compiler_3.5.1      xfun_0.8            base64enc_0.1-3    
## [31] pkgconfig_2.0.2     htmltools_0.3.6     nnet_7.3-12        
## [34] tidyselect_0.2.5    htmlTable_1.13.1    tibble_2.1.3       
## [37] gridExtra_2.3       prodlim_2018.04.18  Hmisc_4.2-0        
## [40] codetools_0.2-16    crayon_1.3.4        dplyr_0.8.3        
## [43] withr_2.1.2         bitops_1.0-6        recipes_0.1.6      
## [46] ModelMetrics_1.2.2  grid_3.5.1          jsonlite_1.6       
## [49] nlme_3.1-140        gtable_0.3.0        magrittr_1.5       
## [52] scales_1.0.0        stringi_1.4.3       latticeExtra_0.6-28
## [55] timeDate_3043.102   generics_0.0.2      Formula_1.2-3      
## [58] lava_1.6.5          RColorBrewer_1.1-2  tools_3.5.1        
## [61] glue_1.3.1          purrr_0.3.2         survival_2.44-1.1  
## [64] yaml_2.2.0          colorspace_1.4-1    cluster_2.1.0